#include <iostream>
#include <vector>
using namespace std;
typedef vector<float> vec1d;
typedef vector<vector<float>> vec2d;
vec1d LUP_solve(vec2d L, vec2d U, vec1d pi, vec1d b){
int n=L.size();
vec1d x(n);
vec1d y(n);
for(int i=0; i<n; ++i){
float ly=0;
for(int j=0; j<i; ++j){
ly+=L[i][j]*y[j];
}
y[i]=b[pi[i]]-ly;
}
for(int i=n-1; i>=0; --i){
float ux=0;
for(int j=0; j<n; ++j){
ux+=U[i][j]*x[j];
}
x[i]=(y[i]-ux)/U[i][i];
}
return x;
}
void LU_Decomposition(vec2d A, vec2d& L, vec2d& U){
int n=A.size();
for(int i=0; i<n; ++i){
for(int j=0; j<n; ++j){
if(i>j) U[i][j]=0;
else if(i<j) L[i][j]=0;
else L[i][i]=1;
}
}
for(int k=0; k<n; ++k){
U[k][k]=A[k][k];
for(int i=k+1; i<n; ++i){
L[i][k]=A[i][k]/A[k][k];
U[k][i]=A[k][i];
}
for(int i=k+1; i<n; ++i){
for(int j=k+1; j<n; ++j){
A[i][j]=A[i][j]-L[i][k]-U[k][j];
}
}
}
}
int main(void){
vec2d A={
{1, 2, 0},
{3, 4, 4},
{5, 6, 3}
};
vec1d b={3, 7, 8};
vec2d L(3, vec1d(3));
vec2d U(3, vec1d(3));
LU_Decomposition(A, L, U);
vec1d P={2, 0, 1};
vec1d x=LUP_solve(L, U, P, b);
for(int i=0; i<A.size(); ++i){
for(int j=0; j<A.size(); ++j) std::cout<<L[i][j]<<' ';
std::cout<<'\n';
}
for(int i=0; i<A.size(); ++i){
for(int j=0; j<A.size(); ++j) std::cout<<U[i][j]<<' ';
std::cout<<'\n';
}
for(float xx: x) std::cout<<xx<<' ';
std::cout<<std::endl;
return 0;
}